rm(list=ls())
#' 9017 Online Panels Benchmarking Study SRM Paper
#' Analysis syntax
#'
#' @author  Dina Neiger
#' @version 20220112
#'
#'
#' @input   
#' Analysis file from Step 1
#' 
#' 
#' 
#' @output
#' Input datafile for bootstrap sig testing for substantive variables weighted
#' Weighted substantive tables proportions in Table C2

options(java.parameters = "-Xmx6096m")


library("plyr")
library("xlsx")
library(reshape2)
library("haven")

# Date formats for xlsxssf
options(xlsx.date.format="dd/MM/yyyy")
options(xlsx.datetime.format="dd/MM/yyyy")

# Windows stub
Z_PATH <- "Z:/"

setwd("Z:/Research Papers and Presentations/SRM article/Submission syntax")

WRK_DIR <- setwd("Z:/Research Papers and Presentations/SRM article/Submission syntax")

INP_DIR <- paste0(WRK_DIR,"/Inputs/")
OUT_DIR <- paste0(WRK_DIR, "/Outputs/")

### read var names and labels for significance testing
load(paste0(INP_DIR,"varnames.RData"))
load(paste0(INP_DIR,"catnames.RData"))

### read analysis file
load(paste0(OUT_DIR,"findata.RData"))


### check missing values in the substantive variables
table(is.na(findata$b1),exclude=NULL)
table(is.na(findata$k6cat),exclude=NULL)
table(is.na(findata$b2),exclude=NULL)
table(is.na(findata$d15),exclude=NULL)
table(is.na(findata$b4),exclude=NULL)
table(is.na(findata$b5),exclude=NULL)

###function to prepare variables for calculation of weighted proportions
wprop_prep <- function(varin, propcat, weightvar){
  varout <- as.numeric(weightvar)
  varout[!(varin %in% propcat)] <- 0
  return(varout)
}


### derive substantive variables for significance testing
sigdata <- findata[, c("surtype","surtype.l","weight1","resp_id")]


#weighted proportions for substantive variables
wvarnames <- NULL
for(i in 1:length(varnames)){
    wvarnames[i]<- paste0(varnames[i],"_",catnames[i],"w1")
    sigdata[wvarnames[[i]]] <- 
           wprop_prep(findata[,varnames[[i]]],catnames[i],findata$weight1)
}
wvarnames 

#rebase for k6
table(findata$k6cat,exclude=NULL)
sigdata$k6cat_1w1[is.na(findata$k6cat)] <-NA
table(is.na(sigdata$k6cat_1w1))

#save the file for significance testing
save(sigdata,file = paste0(OUT_DIR, "S2-data4sigtest.RData"))
write.xlsx2(sigdata,paste0(OUT_DIR,"S2-data4sigtest.xlsx", sep=""), row.names = FALSE,showNA=FALSE)

#save varnames for significance testing
varnames_ds <- NULL
varnames_ds <- as.data.frame(cbind(as.character(varnames),as.character(catnames),as.character(wvarnames)),stringsAsFactors=F)
names(varnames_ds) <-c("varname","catname","wvarname")
save(varnames_ds, file = paste0(OUT_DIR, "S2-variable names for sig testing.RData"))


table(is.na(sigdata$k6cat_1w1))

#frequency tables weighted 

x<- ddply(sigdata,.(surtype.l),function(z){
  z<-z[!is.na(z$k6cat_1w1),]
  z$t=round(100*sum(z$k6cat_1w1)/sum(z$weight1),1)
  })

check_prop <- function(x,wvar){
  x<-x[!is.na(x[,wvar]),]
  ddply(x,.(surtype.l),function(z){z$t=round(100*sum(z[,wvar])/sum(z$weight1),2)})
}
x<- check_prop(sigdata,"k6cat_1w1")

# Table C2 Psychological distress weighted estimate
x

# Weighted results for substantive variables
z<-NULL
checkds <- NULL
for(i in 1:length(varnames)){
  z <- check_prop(sigdata, wvarnames[[i]])
  names(z) <- c("surtype.l",paste0("e_",wvarnames[[i]]))
  if(i==1) checkds <- z
  else checkds <- merge(checkds,z,by="surtype.l")
    }

t(checkds)





